Modelando la serie de la temperatura

La serie de la temperatura fue extraída de la base de datos del IDEAM, se tuvieron en cuenta los promedios diarios de las temperaturas en grados centigrados (°C) registradas en BogotÔ en las diferentes estaciones meteorológicas que recolectaron información de esos días. La serie de tiempo cuenta con un total de 1826 registros, de los cuales 13 (0.7%) fueron imputados puesto que no se presentaba la información necesaria. Esta imputación fue realizada a partir del método de vecinos mÔs cercanos (KNN), donde se tuvierón en cuenta 5 vecinos.

Para el modelar la serie de la temperatura se usara inicialmente un modelo SARIMA, seguido de redes recurrentes simples, GRU y finalmente GRU. Para seleccionar el mejor modelo se tomarƔ como criterio el error cuadrƔtico medio.

Red Neuronal Recurrente GRU

Importación de Datos

In [ ]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

import sklearn as sk
from sklearn import impute
from sklearn import preprocessing
import sklearn.externals
import joblib
from sklearn.model_selection import TimeSeriesSplit
from sklearn.impute import KNNImputer
import sklearn.preprocessing

from keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
import time
import sklearn.externals
import joblib
import plotly.graph_objects as go
from sklearn import metrics

import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from statsmodels.tsa.statespace.sarimax import SARIMAX

import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow.keras.backend as K

%matplotlib inline
In [ ]:
import plotly.io as pio
pio.renderers.default='notebook'
In [ ]:
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [ ]:
df = pd.read_csv("/content/drive/Shareddrives/Mineria /Temperatura1.csv", sep=';', header=0, decimal = ',')
Fecha = pd.date_range(start='2017-01-01', end='2021-12-31', freq='D')
df['Fecha'] = Fecha
df = df.set_index('Fecha')

print(df[pd.isnull(df.ValorObservado)])
print('En total hay' ,
      str(df['ValorObservado'].isnull().sum()) ,
      'valores sin información')
print('Correspondientes al {:.3f}% del total'
      .format(df['ValorObservado'].isnull().sum()*100/len(df)))
            ValorObservado
Fecha                     
2017-08-12             NaN
2017-12-24             NaN
2019-09-15             NaN
2019-09-16             NaN
2019-09-17             NaN
2020-11-12             NaN
2021-01-05             NaN
2021-01-06             NaN
2021-01-07             NaN
2021-01-08             NaN
2021-08-18             NaN
2021-08-20             NaN
2021-12-05             NaN
En total hay 13 valores sin información
Correspondientes al 0.712% del total

La serie presenta valores faltantes, por lo tantol se imputaran usando el método de vecinos mÔs cercanos (KNN), como se muestra a continuación.

Imputación a partir del vecino mÔs cercano

In [ ]:
#Imputación de Valores usando el vecino mÔs cercano
imput = KNNImputer(n_neighbors=5, weights="uniform")

# Ajustamos el modelo e imputamos los missing values
imput.fit(df[['ValorObservado']])
df['ValorObservado'] = imput.transform(df[['ValorObservado']]).ravel()
print()
print("Valores pƩrdidos en ValorObservado: " , 
      str(df['ValorObservado'].isnull().sum()))
Valores pƩrdidos en ValorObservado:  0
In [ ]:
fig = px.line(df, x=df.index, y="ValorObservado")
fig.update_xaxes(title_text="Fecha")
fig.show()

Separación de datos entrenamiento y prueba

Para el respectivo anƔlisis se tomarƔn el 80% de los datos para entrenamiento y validacion, el 20% restantes para prueba, dichos valores corresponden a 1460 y 365 respectivamente.

In [ ]:
from sklearn.preprocessing import MinMaxScaler
# crea el objeto  scaler y escala los datos
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df.values)
#
df_norm = pd.DataFrame(scaled_data,index=df.index, columns=['ValorObservadoNormalizado'])
In [ ]:
train_size = int(len(df_norm) * 0.8)
test_size = len(df_norm) - train_size
train, test = df_norm.iloc[0:train_size], df_norm.iloc[train_size:len(df_norm)]
len_train = len(train)
len_test = len(test)
print(len_train, len_test)
1460 366
In [ ]:
def create_dataset(X, y, time_steps=1):
    # crea dos listas vacias para depositar los datos
    Xs, ys = [], []
    # el primer lote de datos empieza en la primera observación
    # y toma time_steps  datos.
    # Comienza a avanzar hacia adelante.
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)
In [ ]:
time_steps = 50

# reshape to [samples, time_steps, n_features]

X_train, y_train = create_dataset(train, train, time_steps)
X_test, y_test = create_dataset(test, test, time_steps)

print("X_train.shape = ", X_train.shape)
print("y_train.shape = ", y_train.shape)
print("X_test.shape = ", X_test.shape)
print("y_test.shape = ", y_test.shape)
X_train.shape =  (1410, 50, 1)
y_train.shape =  (1410, 1)
X_test.shape =  (316, 50, 1)
y_test.shape =  (316, 1)
In [ ]:
fig = px.line(df_norm, x=df.index, y='ValorObservadoNormalizado')
fig.update_xaxes(title_text="Fecha")
fig.update_yaxes(title_text="ValorObservadoNormalizado")
fig.show()

Modelo (1 paso adelante)

In [ ]:
# shapes
inputs_shape = (X_train.shape[1], X_train.shape[2])
GRU_output = 60

# layers
inputs = L.Input(inputs_shape)
gru = L.GRU(units=GRU_output, name='GRU')(inputs)
outputs_GRU = L.Dense(1)(gru)

GRU_model = Model(inputs=inputs, outputs=outputs_GRU, name='series_GRU_model')

# Compiling the RNN
GRU_model.compile(loss="mean_squared_error", optimizer=Adam(0.001))
# Fitting to the training set
start = time.time()
GRU = GRU_model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=16,
    validation_split=0.1,
    verbose=1,
    shuffle=False
)
print("compilation time : ", time.time() - start)
Epoch 1/50
80/80 [==============================] - 3s 17ms/step - loss: 0.0380 - val_loss: 0.0041
Epoch 2/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0083 - val_loss: 0.0041
Epoch 3/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0072 - val_loss: 0.0039
Epoch 4/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0066 - val_loss: 0.0036
Epoch 5/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0062 - val_loss: 0.0034
Epoch 6/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0060 - val_loss: 0.0033
Epoch 7/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0060 - val_loss: 0.0032
Epoch 8/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0059 - val_loss: 0.0032
Epoch 9/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0059 - val_loss: 0.0031
Epoch 10/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0059 - val_loss: 0.0031
Epoch 11/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0059 - val_loss: 0.0031
Epoch 12/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 13/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 14/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 15/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 16/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 17/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 18/50
80/80 [==============================] - 1s 18ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 19/50
80/80 [==============================] - 1s 18ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 20/50
80/80 [==============================] - 1s 18ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 21/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 22/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 23/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 24/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 25/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 26/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 27/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 28/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 29/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 30/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 31/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 32/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 33/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 34/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 35/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 36/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 37/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 38/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 39/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 40/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 41/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 42/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 43/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0058 - val_loss: 0.0030
Epoch 44/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0057 - val_loss: 0.0030
Epoch 45/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 46/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 47/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 48/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 49/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 50/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0057 - val_loss: 0.0031
compilation time :  57.67443633079529
In [ ]:
joblib.dump(GRU_model,'/content/drive/Shareddrives/Mineria /GRU')
WARNING:absl:Found untraced functions such as gru_cell_layer_call_fn, gru_cell_layer_call_and_return_conditional_losses while saving (showing 2 of 2). These functions will not be directly callable after loading.
Out[ ]:
['/content/drive/Shareddrives/Mineria /GRU']
In [ ]:
GRU_model.summary()
Model: "series_GRU_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 50, 1)]           0         
                                                                 
 GRU (GRU)                   (None, 60)                11340     
                                                                 
 dense (Dense)               (None, 1)                 61        
                                                                 
=================================================================
Total params: 11,401
Trainable params: 11,401
Non-trainable params: 0
_________________________________________________________________
In [ ]:
GRU_losses = pd.DataFrame(GRU.history)
fig = px.line(GRU_losses, x=GRU_losses.index, y=["loss", "val_loss"])
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Loss")
fig.show()

Predicciones (1 paso adelante)

In [ ]:
GRU_Predict = GRU_model.predict(X_test)
GRU_Predict = scaler.inverse_transform(GRU_Predict)
10/10 [==============================] - 1s 4ms/step
In [ ]:
# plot y_train, y_test, and testPredict using plotly
seq_len=50
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df.index[seq_len : len(y_train) + seq_len],
        y=scaler.inverse_transform(y_train).ravel(),
        mode="lines",
        name="Entrenamiento",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train) + seq_len :],
        y=scaler.inverse_transform(y_test).ravel(),
        mode="lines",
        name="Prueba",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train) + seq_len :],
        y=GRU_Predict.ravel(),
        mode="lines",
        name="Predicción",
    )
)
fig.update_xaxes(title_text="Fecha")
fig.update_yaxes(title_text="ValorObservado")
fig.show()

Intervalos de Confianza (1 paso adelante)

In [ ]:
def QuantileLoss(perc, delta=1e-4):
    perc = np.array(perc).reshape(-1)
    perc.sort()
    perc = perc.reshape(1, -1)
    def _qloss(y, pred):
        I = tf.cast(y <= pred, tf.float32)
        d = K.abs(y - pred)
        correction = I * (1 - perc) + (1 - I) * perc
        # huber loss
        huber_loss = K.sum(correction * tf.where(d <= delta, 0.5 * d ** 2 / delta, d - 0.5 * delta), -1)
        # order loss
        q_order_loss = K.sum(K.maximum(0.0, pred[:, :-1] - pred[:, 1:] + 1e-6), -1)
        return huber_loss + q_order_loss
    return _qloss
In [ ]:
# quantiles
perc_points = [0.025, 0.975]

# shapes
inputs_shape = (X_train.shape[1], X_train.shape[2])
GRU_output = 60

# layers
inputs = L.Input(inputs_shape)
qgru = L.GRU(units=GRU_output, name='GRU')(inputs)
qoutputs_GRU = L.Dense(2)(qgru)

qGRU_model = Model(inputs=inputs, outputs=qoutputs_GRU, name='series_GRU_model')

# Compiling the RNN
qGRU_model.compile(Adam(0.001), loss=QuantileLoss(perc_points))
# Fitting to the training set
start = time.time()
qGRU = qGRU_model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=16,
    validation_split=0.1,
    verbose=1,
    shuffle=False
)
print("compilation time : ", time.time() - start)
Epoch 1/50
80/80 [==============================] - 3s 18ms/step - loss: 0.0723 - val_loss: 0.0125
Epoch 2/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0138 - val_loss: 0.0084
Epoch 3/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0124 - val_loss: 0.0079
Epoch 4/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0121 - val_loss: 0.0078
Epoch 5/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0118 - val_loss: 0.0078
Epoch 6/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0113 - val_loss: 0.0074
Epoch 7/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0111 - val_loss: 0.0072
Epoch 8/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0109 - val_loss: 0.0071
Epoch 9/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0108 - val_loss: 0.0073
Epoch 10/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0107 - val_loss: 0.0075
Epoch 11/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0107 - val_loss: 0.0073
Epoch 12/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0108 - val_loss: 0.0072
Epoch 13/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0106 - val_loss: 0.0073
Epoch 14/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0106 - val_loss: 0.0069
Epoch 15/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0106 - val_loss: 0.0067
Epoch 16/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0070
Epoch 17/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0071
Epoch 18/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0106 - val_loss: 0.0070
Epoch 19/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0072
Epoch 20/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0067
Epoch 21/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0071
Epoch 22/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0070
Epoch 23/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0104 - val_loss: 0.0067
Epoch 24/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0069
Epoch 25/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0068
Epoch 26/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0068
Epoch 27/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0069
Epoch 28/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0070
Epoch 29/50
80/80 [==============================] - 1s 15ms/step - loss: 0.0104 - val_loss: 0.0068
Epoch 30/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0068
Epoch 31/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0067
Epoch 32/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0105 - val_loss: 0.0069
Epoch 33/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0068
Epoch 34/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0066
Epoch 35/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0066
Epoch 36/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0103 - val_loss: 0.0068
Epoch 37/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0103 - val_loss: 0.0067
Epoch 38/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0068
Epoch 39/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0069
Epoch 40/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0068
Epoch 41/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0103 - val_loss: 0.0067
Epoch 42/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0069
Epoch 43/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0103 - val_loss: 0.0069
Epoch 44/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0103 - val_loss: 0.0067
Epoch 45/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0103 - val_loss: 0.0068
Epoch 46/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0067
Epoch 47/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0103 - val_loss: 0.0069
Epoch 48/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0103 - val_loss: 0.0070
Epoch 49/50
80/80 [==============================] - 1s 13ms/step - loss: 0.0102 - val_loss: 0.0070
Epoch 50/50
80/80 [==============================] - 1s 14ms/step - loss: 0.0104 - val_loss: 0.0073
compilation time :  57.54221177101135
In [ ]:
qGRU_model.summary()
Model: "series_GRU_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_2 (InputLayer)        [(None, 50, 1)]           0         
                                                                 
 GRU (GRU)                   (None, 60)                11340     
                                                                 
 dense_1 (Dense)             (None, 2)                 122       
                                                                 
=================================================================
Total params: 11,462
Trainable params: 11,462
Non-trainable params: 0
_________________________________________________________________
In [ ]:
qGRU_losses = pd.DataFrame(qGRU.history)
fig = px.line(qGRU_losses, x=qGRU_losses.index, y=["loss", "val_loss"])
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Loss")
fig.show()
In [ ]:
qGRU_Predict = qGRU_model.predict(X_test)
qGRU_Predict = scaler.inverse_transform(qGRU_Predict)
10/10 [==============================] - 0s 4ms/step
In [ ]:
# plot y_train, y_test, and testPredict using plotly
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df.index[seq_len : len(y_train) + seq_len],
        y=scaler.inverse_transform(y_train).ravel(),
        mode="lines",
        name="Entrenamiento",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train) + seq_len :],
        y=GRU_Predict.ravel(),
        mode="lines",
        name="Predicción",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train) + seq_len :],
        y=qGRU_Predict[:,0] ,
        mode="lines",
        name="0.025",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train) + seq_len :],
        y=qGRU_Predict[:,1] ,
        mode="lines",
        name="0.975",
    )
)
fig.update_xaxes(title_text="Fecha")
fig.update_yaxes(title_text="ValorObservado")
fig.show()

Error CuadrƔtico Medio (1 paso adelante)

In [ ]:
from sklearn.metrics import mean_squared_error
GRU_Score1 = mean_squared_error(scaler.inverse_transform(y_test), GRU_Predict)**.5
print('Test Score: %.2f RMSE' % (GRU_Score1))
Test Score: 0.83 RMSE

Predicción 5 pasos adelante

In [ ]:
def create_dataset(X, y, time_steps=1):
    # crea dos listas vacias para depositar los datos
    Xs, ys = [], []
    # el primer lote de datos empieza en la primera observación
    # y toma time_steps  datos.
    # Comienza a avanzar hacia adelante.
    for i in range(len(X) - time_steps-5):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps+5])
    return np.array(Xs), np.array(ys)
In [ ]:
time_steps = 50

# reshape to [samples, time_steps, n_features]

X_train_5p, y_train_5p = create_dataset(train, train, time_steps)
X_test_5p, y_test_5p = create_dataset(test, test, time_steps)

print("X_train.shape = ", X_train_5p.shape)
print("y_train.shape = ", y_train_5p.shape)
print("X_test.shape = ", X_test_5p.shape)
print("y_test.shape = ", y_test_5p.shape)
X_train.shape =  (1405, 50, 1)
y_train.shape =  (1405, 1)
X_test.shape =  (311, 50, 1)
y_test.shape =  (311, 1)
In [ ]:
# shapes
inputs_shape = (X_train_5p.shape[1], X_train_5p.shape[2])
GRU_output_5p = 60

# layers
inputs_5p = L.Input(inputs_shape)
gru_5p = L.GRU(units=GRU_output_5p, name='GRU')(inputs_5p)
outputs_GRU_5p = L.Dense(1)(gru_5p)

GRU_model_5p = Model(inputs=inputs_5p, outputs=outputs_GRU_5p, name='series_GRU_model')

# Compiling the RNN
GRU_model_5p.compile(optimizer=Adam(0.001),loss="mean_squared_error")
# Fitting to the training set
start = time.time()
GRU_5p = GRU_model_5p.fit(
    X_train_5p,
    y_train_5p,
    epochs=50,
    batch_size=16,
    validation_split=0.1,
    verbose=1,
    shuffle=False
)
print("compilation time : ", time.time() - start)
Epoch 1/50
79/79 [==============================] - 2s 13ms/step - loss: 0.0231 - val_loss: 0.0051
Epoch 2/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0121 - val_loss: 0.0053
Epoch 3/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0121 - val_loss: 0.0054
Epoch 4/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0121 - val_loss: 0.0054
Epoch 5/50
79/79 [==============================] - 1s 10ms/step - loss: 0.0121 - val_loss: 0.0054
Epoch 6/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0120 - val_loss: 0.0054
Epoch 7/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0120 - val_loss: 0.0054
Epoch 8/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0120 - val_loss: 0.0054
Epoch 9/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0120 - val_loss: 0.0054
Epoch 10/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0119 - val_loss: 0.0054
Epoch 11/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0119 - val_loss: 0.0053
Epoch 12/50
79/79 [==============================] - 1s 10ms/step - loss: 0.0119 - val_loss: 0.0053
Epoch 13/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0119 - val_loss: 0.0053
Epoch 14/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0119 - val_loss: 0.0053
Epoch 15/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0118 - val_loss: 0.0053
Epoch 16/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0118 - val_loss: 0.0053
Epoch 17/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0118 - val_loss: 0.0053
Epoch 18/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0118 - val_loss: 0.0053
Epoch 19/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0118 - val_loss: 0.0053
Epoch 20/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0118 - val_loss: 0.0053
Epoch 21/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0118 - val_loss: 0.0053
Epoch 22/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 23/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 24/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 25/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 26/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 27/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 28/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 29/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 30/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 31/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 32/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 33/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0117 - val_loss: 0.0053
Epoch 34/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0053
Epoch 35/50
79/79 [==============================] - 1s 10ms/step - loss: 0.0116 - val_loss: 0.0053
Epoch 36/50
79/79 [==============================] - 1s 10ms/step - loss: 0.0116 - val_loss: 0.0053
Epoch 37/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0053
Epoch 38/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0053
Epoch 39/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0053
Epoch 40/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 41/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 42/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 43/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 44/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 45/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 46/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 47/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 48/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 49/50
79/79 [==============================] - 1s 10ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 50/50
79/79 [==============================] - 1s 9ms/step - loss: 0.0116 - val_loss: 0.0055
compilation time :  37.954171895980835
In [ ]:
joblib.dump(GRU_model_5p,'/content/drive/Shareddrives/Mineria /GRU_r')
WARNING:absl:Found untraced functions such as gru_cell_2_layer_call_fn, gru_cell_2_layer_call_and_return_conditional_losses while saving (showing 2 of 2). These functions will not be directly callable after loading.
Out[ ]:
['/content/drive/Shareddrives/Mineria /GRU_r']
In [ ]:
GRU_model_5p.summary()
Model: "series_GRU_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_3 (InputLayer)        [(None, 50, 1)]           0         
                                                                 
 GRU (GRU)                   (None, 60)                11340     
                                                                 
 dense_2 (Dense)             (None, 1)                 61        
                                                                 
=================================================================
Total params: 11,401
Trainable params: 11,401
Non-trainable params: 0
_________________________________________________________________
In [ ]:
GRU_losses_5p = pd.DataFrame(GRU_5p.history)
fig = px.line(GRU_losses_5p, x=GRU_losses_5p.index, y=["loss", "val_loss"])
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Loss")
fig.show()
In [ ]:
GRU_Predict_5p = GRU_model.predict(X_test_5p )
GRU_Predict_5p = scaler.inverse_transform(GRU_Predict_5p)
10/10 [==============================] - 0s 4ms/step
In [ ]:
# plot y_train, y_test, and testPredict using plotly
seq_len = 50
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df.index[seq_len : len(y_train_5p) + seq_len],
        y=scaler.inverse_transform(y_train_5p).ravel(),
        mode="lines",
        name="Entrenamiento",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train_5p) + seq_len :],
        y=scaler.inverse_transform(y_test_5p).ravel(),
        mode="lines",
        name="Prueba",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train_5p) + seq_len :],
        y=GRU_Predict_5p.ravel(),
        mode="lines",
        name="Predicción",
    )
)
fig.update_xaxes(title_text="Fecha")
fig.update_yaxes(title_text="ValorObservado")
fig.show()
In [ ]:
GRU_Score2 = metrics.mean_squared_error(scaler.inverse_transform(y_test_5p), GRU_Predict_5p)
print('Test Score: %.2f RMSE' % (GRU_Score2))
Test Score: 1.52 RMSE

Tomando mƔs retardos

In [ ]:
def create_dataset(X, y, time_steps=1):
    # crea dos listas vacias para depositar los datos
    Xs, ys = [], []
    # el primer lote de datos empieza en la primera observación
    # y toma time_steps  datos.
    # Comienza a avanzar hacia adelante.
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)
In [ ]:
time_steps1 = 100

# reshape to [samples, time_steps, n_features]

X_train_r, y_train_r = create_dataset(train, train, time_steps1)
X_test_r, y_test_r = create_dataset(test, test, time_steps1)

print("X_train.shape = ", X_train_r.shape)
print("y_train.shape = ", y_train_r.shape)
print("X_test.shape = ", X_test_r.shape)
print("y_test.shape = ", y_test_r.shape)
X_train.shape =  (1360, 100, 1)
y_train.shape =  (1360, 1)
X_test.shape =  (266, 100, 1)
y_test.shape =  (266, 1)
In [ ]:
# shapes
inputs_shape = (X_train_r.shape[1], X_train_r.shape[2])
GRU_output_r = 60

# layers
inputs_r = L.Input(inputs_shape)
gru_r = L.GRU(units=GRU_output_r, name='GRU')(inputs_r)
outputs_GRU_r = L.Dense(1)(gru_r)

GRU_model_r = Model(inputs=inputs_r, outputs=outputs_GRU_r, name='series_GRU_model')

# Compiling the RNN
GRU_model_r.compile(optimizer=Adam(0.001),loss="mean_squared_error")
# Fitting to the training set
start = time.time()
GRU_r = GRU_model_r.fit(
    X_train_r,
    y_train_r,
    epochs=50,
    batch_size=16,
    validation_split=0.1,
    verbose=1,
    shuffle=False
)
print("compilation time : ", time.time() - start)
Epoch 1/50
77/77 [==============================] - 4s 28ms/step - loss: 0.0409 - val_loss: 0.0044
Epoch 2/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0090 - val_loss: 0.0043
Epoch 3/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0079 - val_loss: 0.0041
Epoch 4/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0071 - val_loss: 0.0038
Epoch 5/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0066 - val_loss: 0.0035
Epoch 6/50
77/77 [==============================] - 2s 25ms/step - loss: 0.0063 - val_loss: 0.0034
Epoch 7/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0062 - val_loss: 0.0033
Epoch 8/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0061 - val_loss: 0.0032
Epoch 9/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0060 - val_loss: 0.0032
Epoch 10/50
77/77 [==============================] - 2s 23ms/step - loss: 0.0059 - val_loss: 0.0032
Epoch 11/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0059 - val_loss: 0.0031
Epoch 12/50
77/77 [==============================] - 2s 25ms/step - loss: 0.0059 - val_loss: 0.0031
Epoch 13/50
77/77 [==============================] - 2s 23ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 14/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 15/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 16/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 17/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 18/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 19/50
77/77 [==============================] - 2s 23ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 20/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 21/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 22/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 23/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 24/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 25/50
77/77 [==============================] - 2s 25ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 26/50
77/77 [==============================] - 2s 25ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 27/50
77/77 [==============================] - 2s 25ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 28/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 29/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 30/50
77/77 [==============================] - 2s 25ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 31/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0058 - val_loss: 0.0031
Epoch 32/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 33/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 34/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 35/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 36/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 37/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 38/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 39/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 40/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 41/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 42/50
77/77 [==============================] - 2s 25ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 43/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 44/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 45/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 46/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 47/50
77/77 [==============================] - 2s 25ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 48/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 49/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
Epoch 50/50
77/77 [==============================] - 2s 24ms/step - loss: 0.0057 - val_loss: 0.0031
compilation time :  94.71847724914551
In [ ]:
joblib.dump(GRU_model_5p,'/content/drive/Shareddrives/Mineria /GRU_r')
WARNING:absl:Found untraced functions such as gru_cell_2_layer_call_fn, gru_cell_2_layer_call_and_return_conditional_losses while saving (showing 2 of 2). These functions will not be directly callable after loading.
Out[ ]:
['/content/drive/Shareddrives/Mineria /GRU_r']
In [ ]:
GRU_model_r.summary()
Model: "series_GRU_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_4 (InputLayer)        [(None, 100, 1)]          0         
                                                                 
 GRU (GRU)                   (None, 60)                11340     
                                                                 
 dense_3 (Dense)             (None, 1)                 61        
                                                                 
=================================================================
Total params: 11,401
Trainable params: 11,401
Non-trainable params: 0
_________________________________________________________________
In [ ]:
GRU_losses_r = pd.DataFrame(GRU_r.history)
fig = px.line(GRU_losses_r, x=GRU_losses_r.index, y=["loss", "val_loss"])
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Loss")
fig.show()
In [ ]:
GRU_Predict_r = GRU_model_r.predict(X_test_r)
GRU_Predict_r = scaler.inverse_transform(GRU_Predict_r)
9/9 [==============================] - 0s 7ms/step
In [ ]:
# plot y_train, y_test, and testPredict using plotly
seq_len1 = 100
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df.index[seq_len1 : len(y_train_r) + seq_len1],
        y=scaler.inverse_transform(y_train_r).ravel(),
        mode="lines",
        name="Entrenamiento",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train_r) + seq_len1 :],
        y=scaler.inverse_transform(y_test_r).ravel(),
        mode="lines",
        name="Prueba",
    )
)
fig.add_trace(
    go.Scatter(
        x=df.index[len(y_train_r) + seq_len1 :],
        y=GRU_Predict_r.ravel(),
        mode="lines",
        name="Predicción",
    )
)
fig.update_xaxes(title_text="Fecha")
fig.update_yaxes(title_text="ValorObservado")
fig.show()
In [ ]:
GRU_Score3 = metrics.mean_squared_error(scaler.inverse_transform(y_test_r), GRU_Predict_r)
print('Test Score: %.2f RMSE' % (GRU_Score3))
Test Score: 0.65 RMSE

Comparación

In [ ]:
print('Modelo GRU 1 paso adelante 50 retardos %f' % (GRU_Score1))
print('Modelo GRU 5 paso adelante 50 retardos %f' % (GRU_Score2))
print('Modelo GRU 1 paso adelante 100 retardos %f' % (GRU_Score3))
Modelo GRU 1 paso adelante 50 retardos 0.832429
Modelo GRU 5 paso adelante 50 retardos 1.523124
Modelo GRU 1 paso adelante 100 retardos 0.650262